CentOS 7 : SQL Server 2017 : Always On AG : Server World

SQL Server 2017 : Always On Availability Group

2017/10/22

Configure Always On Availability Group for SQL Server on Linux.

This example is based on the environment like follows.

                                         |
                                         |VIP:10.0.0.50
           +-----------------------------+-----------------------------+
           |10.0.0.51                    |10.0.0.52                    |10.0.0.53
+----------+-----------+      +----------+-----------+      +----------+-----------+
| [ node01.srv.world ] |      | [ node02.srv.world ] |      | [ node03.srv.world ] |
|      (Primary)       |      |      (Secondary)     |      |      (Secondary)     |
|      SQL Server      |      |       SQL Server     |      |       SQL Server     |
+----------------------+      +----------------------+      +----------------------+

[1]	Configure basic settings of Cluster, refer to here.
[2]	Install SQL Server on all nodes, refer to here.
[3]	Install SQL Server for HA package and Enable Always On Availability Group on all nodes. Furthermore, create a user for mirroring on all nodes. Replace 'password' to strong one.

[root@node01 ~]#

yum -y install mssql-server-ha

[root@node01 ~]#

/opt/mssql/bin/mssql-conf set hadr.hadrenabled 1

SQL Server needs to be restarted in order to apply this setting. Please run
'systemctl restart mssql-server.service'.
[root@node01 ~]#

systemctl restart mssql-server

[root@node01 ~]#

sqlcmd -S localhost -U SA

Password:

1> alter event session AlwaysOn_health on server with (startup_state=on);
2> go
1> create login dbm_login with password = 'password';
2> create user dbm_user for login dbm_login;
3> go
1> exit

[4]	Create certificates on Primary node and copy them to other secondary nodes. Replace 'password', 'pvk_password' to strong one.

[root@node01 ~]#

sqlcmd -S localhost -U SA

Password:

1> create master key encryption by password = 'password';
2> create certificate dbm_certificate with subject = 'dbm';
3> backup certificate dbm_certificate
4> to file = '/var/opt/mssql/data/dbm_certificate.cer'
5> with private key (
6> file = '/var/opt/mssql/data/dbm_certificate.pvk',
7> encryption by password = 'pvk_password'
8> );
9> go
1> exit

[root@node01 ~]#

cd /var/opt/mssql/data

[root@node01 data]#

scp dbm_certificate.* root@node02.srv.world:/var/opt/mssql/data/

root@node02.srv.world's password:
dbm_certificate.cer                           100%  667   614.7KB/s   00:00
dbm_certificate.pvk                           100% 1212     1.0MB/s   00:00

[root@node01 data]#

scp dbm_certificate.* root@node03.srv.world:/var/opt/mssql/data/

root@node03.srv.world's password:
dbm_certificate.cer                           100%  667   581.6KB/s   00:00
dbm_certificate.pvk                           100% 1212   935.1KB/s   00:00

[root@node01 data]#

ssh root@node02.srv.world 'chown mssql. /var/opt/mssql/data/dbm_certificate.*'

[root@node01 data]#

ssh root@node03.srv.world 'chown mssql. /var/opt/mssql/data/dbm_certificate.*'

[5]	Create certificates on all secondary nodes. Replace 'password' to strong one. For 'pvk_password', specify the same password you set on Primary node above.

[root@node02 ~]#

sqlcmd -S localhost -U SA

Password:

1> create master key encryption by password = 'password';
2> create certificate dbm_certificate authorization dbm_user
3> from file = '/var/opt/mssql/data/dbm_certificate.cer'
4> with private key (
5> file = '/var/opt/mssql/data/dbm_certificate.pvk',
6> decryption by password = 'pvk_password'
7> );
8> go
1> exit

[6]	Set Endpoint for mirroring on all nodes.

[root@node01 ~]#

sqlcmd -S localhost -U SA

Password:

1> create endpoint [Hadr_endpoint]
2> as tcp (listener_ip = (0.0.0.0), listener_port = 5022)
3> for data_mirroring (
4> role = all,
5> authentication = certificate dbm_certificate,
6> encryption = required algorithm aes
7> );
8> alter endpoint [Hadr_endpoint] state = started;
9> grant connect on endpoint::[Hadr_endpoint] to [dbm_login];
10> go
1> exit

# if Firewalld is running, allow the port

[root@node01 ~]#

firewall-cmd --add-port=5022/tcp --permanent

[root@node01 ~]#

firewall-cmd --reload

[7]	Create Availability Group on primary node.

[root@node01 ~]#

sqlcmd -S localhost -U SA

Password:

1> create availability group [AG01]
2> with (db_failover = on, cluster_type = external)
3> for replica on
4> N'node01' with (
5> endpoint_url = N'tcp://10.0.0.51:5022',
6> availability_mode = synchronous_commit,
7> failover_mode = external,
8> seeding_mode = automatic
9> ),
10> N'node02' with (
11> endpoint_url = N'tcp://10.0.0.52:5022',
12> availability_mode = synchronous_commit,
13> failover_mode = external,
14> seeding_mode = automatic
15> ),
16> N'node03' with (
17> endpoint_url = N'tcp://10.0.0.53:5022',
18> availability_mode = synchronous_commit,
19> failover_mode = external,
20> seeding_mode = automatic
21> );
22> alter availability group [AG01] grant create any database;
23> go
1> exit

[8]	Join in Availability Group on all secondary nodes.

[root@node02 ~]#

sqlcmd -S localhost -U SA

Password:

1> alter availability group [AG01] join with (cluster_type = external);
2> alter availability group [AG01] grant create any database;
3> go
1> exit

[9]	Add Database to Availability Group on primary node.

[root@node01 ~]#

sqlcmd -S localhost -U SA

Password:

1> create database [AG01_DB];
2> alter database [AG01_DB] set recovery full;
3> backup database [AG01_DB] to disk = N'/var/opt/mssql/data/AG01_DB.bak';
4> alter availability group [AG01] add database [AG01_DB];
5> go
Processed 304 pages for database 'AG01_DB', file 'AG01_DB' on file 1.
Processed 3 pages for database 'AG01_DB', file 'AG01_DB_log' on file 1.
BACKUP DATABASE successfully processed 307 pages in 0.147 seconds (16.315 MB/sec).
1> exit

# verify possible to see the database on secondary nodes

[root@node01 ~]#

sqlcmd -S node02 -U SA -Q 'select name, create_date from sys.databases where name = "AG01_DB"'

Password:

name                         create_date
---------------------------- -----------------------
AG01_DB                      2017-10-22 11:39:47.937

(1 rows affected)

[10]	Add an account for Pacemaker on all nodes.

[root@node01 ~]#

sqlcmd -S localhost -U SA

Password:


1> use master;
2> go

# add [hacluster] user
1> create login hacluster with PASSWORD= N'password';
2> go

# add [sysadmin] role
1> alter server role [sysadmin] add member hacluster;
2> go
1> exit

[root@node01 ~]#

echo 'hacluster' > /var/opt/mssql/secrets/passwd

[root@node01 ~]#

echo 'password' >> /var/opt/mssql/secrets/passwd

[root@node01 ~]#

chown root. /var/opt/mssql/secrets/passwd

[root@node01 ~]#

chmod 600 /var/opt/mssql/secrets/passwd

[11]	Add SQL Server resource on primary node.

# disable STONITH (Shoot The Other Node In The Head) option

[root@node01 ~]#

pcs property set stonith-enabled=false

# disable start-failure-is-fatal option

[root@node01 ~]#

pcs property set start-failure-is-fatal=false

[root@node01 ~]#

pcs resource create AG01_Cluster ocf:mssql:ag ag_name=AG01 --master meta notify=true

Warning: flag '--master' is deprecated, use keyword 'master' instead (see the usage)
[root@node01 ~]#

pcs resource

 Master/Slave Set: AG01_Cluster-master [AG01_Cluster]
     Slaves: [ node01.srv.world node02.srv.world node03.srv.world ]

# set required_synchronized_secondaries_to_commit=1

[root@node01 ~]#

pcs resource update AG01_Cluster required_synchronized_secondaries_to_commit=1

# Add Virtula IP address

[root@node01 ~]#

pcs resource create VIP ocf:heartbeat:IPaddr2 ip=10.0.0.50

[root@node01 ~]#

pcs constraint colocation add VIP AG01_Cluster-master INFINITY with-rsc-role=Master

[root@node01 ~]#

pcs constraint order promote AG01_Cluster-master then start VIP

# make node01 be primary

[root@node01 ~]#

pcs resource move AG01_Cluster-master node01.srv.world --master

# few seconds later, resources starts

[root@node01 ~]#

pcs resource

 Master/Slave Set: AG01_Cluster-master [AG01_Cluster]
     Masters: [ node01.srv.world ]
     Slaves: [ node02.srv.world node03.srv.world ]
 VIP    (ocf::heartbeat:IPaddr2):       Started node01.srv.world

[12]	To run failover manually, run [pcs resource move] like follows.

# make node02 be primary

[root@node01 ~]#

pcs resource move AG01_Cluster-master node02.srv.world --master

# few seconds later, failover will be finished

[root@node01 ~]#

pcs resource

 Master/Slave Set: AG01_Cluster-master [AG01_Cluster]
     Masters: [ node02.srv.world ]
     Slaves: [ node01.srv.world node03.srv.world ]
 VIP    (ocf::heartbeat:IPaddr2):       Started node02.srv.world

# verify to access to the virtual IP address

[root@node01 ~]#

sqlcmd -S 10.0.0.50 -U SA -Q 'select @@servername'

Password:


--------------------------------------
node02

(1 rows affected)